library(ggpubr)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 methods overwritten by 'car':
  method                          from
  influence.merMod                lme4
  cooks.distance.influence.merMod lme4
  dfbeta.influence.merMod         lme4
  dfbetas.influence.merMod        lme4
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
options(na.action = "na.fail") 
source("./dredge_functions.R")
dredge_and_subset <- function(data) {
  model <- lm(
    response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
    data=data
  )
  dredge_result <- dredge(model)
  summary(model.avg(dredge_result, subset = delta < 10))
}
load_niche_data <- function() {
  filename <- 'species_analysis_cache.csv'
  
  if (!file.exists(filename)) {
    
    sql <- "
        SELECT 
          niche_name,
          foraging_niche, 
          trophic_niche, 
          is_noctural, 
          total_species,
          merlin_10_perc_ratio, 
          merlin_20_perc_ratio,
          merlin_ever_perc_ratio,
          birdlife_10_perc_ratio, 
          birdlife_20_perc_ratio,
          birdlife_ever_perc_ratio,
          either_10_perc_ratio, 
          either_20_perc_ratio, 
          either_ever_perc_ratio,
          both_10_perc_ratio, 
          both_20_perc_ratio,
          both_ever_perc_ratio,
          body_morphspace.pc1.mean AS pc1, 
          body_morphspace.pc2.mean AS pc2, 
          body_morphspace.pc3.mean AS pc3, 
          body_morphspace.pc4.mean AS pc4
        FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000 
    "
  
    tb <- bq_project_query(projectid, sql)

    data <- bq_table_download(tb)
    write_csv(data, filename)
  }
  
  data <- read_csv(filename)
  
  data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
  
  data$foraging_niche = as.factor(data$foraging_niche)
  data$trophic_niche = as.factor(data$trophic_niche)
  data$is_nocturnal = as.factor(data$is_noctural)
  
  data
}

data_for_response <- function(column_name_for_response) {
  data <- load_niche_data()
  names(data)[names(data) == column_name_for_response] <- "response"
  
  data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
0.25 Percentile - 10% of species
merlin_10_data <- data_for_response('merlin_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_10_data
merlin_10_result <- dredge_and_subset(merlin_10_data)
Fixed term is "(Intercept)"
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
Fixed term is "(Intercept)"
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
both_10_data <- data_for_response('both_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
both_10_result <- dredge_and_subset(both_10_data)
Fixed term is "(Intercept)"
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
either_10_data <- data_for_response('either_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
either_10_result <- dredge_and_subset(either_10_data)
Fixed term is "(Intercept)"
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
Full result for 10% of species
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
0.75 Percentile - 20% of species
merlin_20_data <- data_for_response('merlin_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_20_result <- dredge_and_subset(merlin_20_data)
Fixed term is "(Intercept)"
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
Fixed term is "(Intercept)"
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
both_20_data <- data_for_response('both_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
both_20_result <- dredge_and_subset(both_20_data)
Fixed term is "(Intercept)"
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
either_20_data <- data_for_response('either_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
either_20_result <- dredge_and_subset(either_20_data)
Fixed term is "(Intercept)"
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
Full result for 10% of species
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
Comparing the 2 percentiles
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
Warning: position_stack requires non-overlapping x intervals

bind_sets <- function(first_percentile, second_percentile) {
  first_percentile$percentile <- "Top 10%"
  second_percentile$percentile <- "Top 20%"
  rbind(first_percentile, second_percentile)
}

merlin_species_data <- bind_sets(merlin_10_data,  merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data,  birdlife_20_data)
either_species_data <- bind_sets(either_10_data,  either_20_data)
both_species_data <- bind_sets(both_10_data,  both_20_data)
diff_set <- function(first_percentile, second_percentile) {
  second_percentile$response_20 <- second_percentile$response
  
  result <- right_join(first_percentile, second_percentile[,c("niche_name", "response_20")], by = c("niche_name"))
  result$diff <- result$response_20 - result$response
  result$increase <- result$diff / result$response_20
  result$increase[is.na(result$increase)] = 0
  result$response_10 <- result$response
  result[,c("response_10", "response_20", "diff", "increase", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}

merlin_diff_data <- diff_set(merlin_10_data,  merlin_20_data)
birdlife_diff_data <- diff_set(birdlife_10_data,  birdlife_20_data)
either_diff_data <- diff_set(either_10_data,  either_20_data)
both_diff_data <- diff_set(both_10_data,  both_20_data)

merlin_diff_data
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9  1.762  0.1958   1.679 0.0943 .
Residuals     258 30.084  0.1166                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9  1.762  0.1958   1.679 0.0943 .
Residuals     258 30.084  0.1166                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                          Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                 1  1.508  1.5077  69.821 4.05e-15 ***
trophic_niche:percentile   9  1.177  0.1308   6.057 1.03e-07 ***
Residuals                258  5.571  0.0216                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties

    Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  merlin_diff_data$increase and merlin_diff_data$trophic_niche 

                      Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore Scavenger
Frugivore             0.5331           -         -         -                 -                     -           -           -        -        
Granivore             1.0000           1.0000    -         -                 -                     -           -           -        -        
Herbivore aquatic     1.0000           0.1167    1.0000    -                 -                     -           -           -        -        
Herbivore terrestrial 1.0000           1.0000    1.0000    1.0000            -                     -           -           -        -        
Invertivore           0.0076           1.0000    1.0000    0.0051            1.0000                -           -           -        -        
Nectarivore           1.0000           1.0000    1.0000    1.0000            1.0000                1.0000      -           -        -        
Omnivore              0.2684           1.0000    1.0000    0.0679            1.0000                1.0000      1.0000      -        -        
Scavenger             1.0000           1.0000    1.0000    1.0000            1.0000                1.0000      1.0000      1.0000   -        
Vertivore             1.0000           1.0000    1.0000    0.5929            1.0000                1.0000      1.0000      1.0000   1.0000   

P value adjustment method: holm 
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)
               Df Sum Sq Mean Sq F value   Pr(>F)    
trophic_niche   9  3.941  0.4379   3.689 0.000229 ***
Residuals     258 30.629  0.1187                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
merlin_increase_tropic_niche_tukey
     Aquatic predator             Frugivore             Granivore     Herbivore aquatic Herbivore terrestrial           Invertivore           Nectarivore              Omnivore 
                 "bc"                  "ab"                  "ac"                   "c"                  "ac"                   "a"                  "ac"                  "ab" 
            Scavenger             Vertivore 
                 "ac"                  "ac" 
plot(merlin_increase_tropic_niche_tukey)

with.tukey.label.as.group <- function(tukey, dataset, join_by) {
  labels <- data.frame(tukey$mcletters$Letters)
  labels$category <- rownames(labels)
  colnames(labels) <- c("group", "category")
  
  
  left_join(dataset, labels, by = join_by)
}
merlin_diff_data1 <- with.tukey.label.as.group(merlin_increase_tropic_niche_tukey, merlin_diff_data, c("trophic_niche" = "category"))
merlin_diff_data1
table(merlin_diff_data1$trophic_niche)

     Aquatic predator             Frugivore             Granivore     Herbivore aquatic Herbivore terrestrial           Invertivore           Nectarivore              Omnivore 
                   58                    25                    10                    15                     9                    74                    11                    34 
            Scavenger             Vertivore 
                    3                    29 
ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()

ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche   9  1.696  0.1885   1.605  0.114
Residuals     258 30.292  0.1174               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche   9  1.696  0.1885   1.605  0.114
Residuals     258 30.292  0.1174               

Error: Within
                          Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                 1  1.327  1.3274  64.018 4.19e-14 ***
trophic_niche:percentile   9  0.540  0.0600   2.893  0.00282 ** 
Residuals                258  5.350  0.0207                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9   2.21  0.2453   1.791 0.0703 .
Residuals     258  35.35  0.1370                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
birdlife_increase_tropic_niche_tukey
     Aquatic predator             Frugivore             Granivore     Herbivore aquatic Herbivore terrestrial           Invertivore           Nectarivore              Omnivore 
                  "b"                  "ab"                  "ab"                  "ab"                  "ab"                   "a"                  "ab"                  "ab" 
            Scavenger             Vertivore 
                 "ab"                  "ab" 
birdlife_diff_data1 <- with.tukey.label.as.group(birdlife_increase_tropic_niche_tukey, birdlife_diff_data, c("trophic_niche" = "category"))
ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()

ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

nrow(merlin_10_data[merlin_10_data$response > 0,])
[1] 96
nrow(merlin_20_data[merlin_20_data$response > 0,])
[1] 128
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
[1] 87
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
[1] 124
nrow(either_10_data[either_10_data$response > 0,])
[1] 94
nrow(either_20_data[either_20_data$response > 0,])
[1] 123
nrow(both_10_data[both_10_data$response > 0,])
[1] 91
nrow(both_20_data[both_20_data$response > 0,])
[1] 128
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)

merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche  32  4.894  0.1529   1.333  0.118
Residuals      235 26.952  0.1147               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche  32  4.894  0.1529   1.333  0.118
Residuals      235 26.952  0.1147               

Error: Within
                           Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                  1  1.508  1.5077  68.706 8.81e-15 ***
foraging_niche:percentile  32  1.591  0.0497   2.266 0.000275 ***
Residuals                 235  5.157  0.0219                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)  
foraging_niche  32  5.519  0.1725   1.531 0.0401 *
Residuals      235 26.470  0.1126                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)  
foraging_niche  32  5.519  0.1725   1.531 0.0401 *
Residuals      235 26.470  0.1126                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                           Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                  1  1.327  1.3274  67.870 1.22e-14 ***
foraging_niche:percentile  32  1.293  0.0404   2.066  0.00118 ** 
Residuals                 235  4.596  0.0196                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pc_axis_figure <- function(dataset) {
  annotate_figure(
ggarrange(
  ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
pc_axis_figure(merlin_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).

library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc1               1 0.00042 0.00042  0.0169
percentile        1 1.50774 1.50774 59.9817
pc1:percentile    1 0.06215 0.06215  2.4724
summary(merlin_pc1_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc1 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -8.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4461 -0.5197  0.0155  0.1840  3.3433 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04729  0.2175  
 Residual               0.02514  0.1585  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                       Estimate Std. Error t value
(Intercept)            0.117739   0.022322   5.275
pc1                   -0.005011   0.006481  -0.773
percentileTop 20%      0.086293   0.018597   4.640
pc1:percentileTop 20%  0.008490   0.005399   1.572

Correlation of Fixed Effects:
            (Intr) pc1    prT20%
pc1         -0.676              
prcntlTp20% -0.417  0.282       
pc1:prcT20%  0.282 -0.417 -0.676
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc2               1 0.01486 0.01486  0.5864
percentile        1 1.50774 1.50774 59.5009
pc2:percentile    1 0.00812 0.00812  0.3203
summary(merlin_pc2_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc2 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -11.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3691 -0.5150  0.0879  0.1354  3.3292 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04706  0.2169  
 Residual               0.02534  0.1592  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                       Estimate Std. Error t value
(Intercept)            0.104389   0.016836   6.200
pc2                   -0.008532   0.018597  -0.459
percentileTop 20%      0.104347   0.014086   7.408
pc2:percentileTop 20% -0.008805   0.015559  -0.566

Correlation of Fixed Effects:
            (Intr) pc2    prT20%
pc2          0.217              
prcntlTp20% -0.418 -0.091       
pc2:prcT20% -0.091 -0.418  0.217
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc3               1 0.00044 0.00044  0.0187
percentile        1 1.50774 1.50774 63.7963
pc3:percentile    1 0.46195 0.46195 19.5464
summary(merlin_pc3_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc3 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -30.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.8609 -0.4057 -0.0338  0.2719  3.7473 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04804  0.2192  
 Residual               0.02363  0.1537  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                      Estimate Std. Error t value
(Intercept)            0.10758    0.01638   6.568
pc3                   -0.04333    0.02594  -1.670
percentileTop 20%      0.10282    0.01330   7.730
pc3:percentileTop 20%  0.09314    0.02107   4.421

Correlation of Fixed Effects:
            (Intr) pc3    prT20%
pc3         -0.055              
prcntlTp20% -0.406  0.022       
pc3:prcT20%  0.022 -0.406 -0.055
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc4               1 0.06576 0.06576  2.6061
percentile        1 1.50774 1.50774 59.7558
pc4:percentile    1 0.03687 0.03687  1.4612
summary(merlin_pc4_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc4 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -17

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4101 -0.5034  0.0731  0.1400  3.4938 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04666  0.2160  
 Residual               0.02523  0.1588  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                      Estimate Std. Error t value
(Intercept)            0.10678    0.01640   6.513
pc4                    0.03400    0.03543   0.960
percentileTop 20%      0.10683    0.01374   7.777
pc4:percentileTop 20%  0.03588    0.02969   1.209

Correlation of Fixed Effects:
            (Intr) pc4    prT20%
pc4          0.046              
prcntlTp20% -0.419 -0.019       
pc4:prcT20% -0.019 -0.419  0.046
pc_axis_figure(birdlife_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).

ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)  
is_nocturnal   1  0.462  0.4615   3.912  0.049 *
Residuals    266 31.384  0.1180                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)  
is_nocturnal   1  0.462  0.4615   3.912  0.049 *
Residuals    266 31.384  0.1180                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                         Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                1  1.508  1.5077  60.685 1.51e-13 ***
is_nocturnal:percentile   1  0.140  0.1396   5.618   0.0185 *  
Residuals               266  6.609  0.0248                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal   1   0.20  0.1982   1.658  0.199
Residuals    266  31.79  0.1195               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal   1   0.20  0.1982   1.658  0.199
Residuals    266  31.79  0.1195               

Error: Within
                         Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                1  1.327  1.3274  60.370 1.72e-13 ***
is_nocturnal:percentile   1  0.041  0.0407   1.852    0.175    
Residuals               266  5.849  0.0220                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Investigation of percentiles and presence
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).

merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
Analysis of Variance Table
                         npar  Sum Sq Mean Sq F value
total_species               1 0.00052 0.00052  0.0206
percentile                  1 1.50774 1.50774 59.4316
total_species:percentile    1 0.00026 0.00026  0.0103
summary(merlin_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: 8.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3523 -0.5462  0.1109  0.1216  3.2582 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04717  0.2172  
 Residual               0.02537  0.1593  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                                  Estimate Std. Error t value
(Intercept)                      1.064e-01  1.690e-02   6.295
total_species                   -1.420e-05  1.612e-04  -0.088
percentileTop 20%                1.064e-01  1.414e-02   7.527
total_species:percentileTop 20% -1.367e-05  1.348e-04  -0.101

Correlation of Fixed Effects:
            (Intr) ttl_sp prT20%
total_specs -0.229              
prcntlTp20% -0.418  0.096       
ttl_sp:T20%  0.096 -0.418 -0.229
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
Analysis of Variance Table
                         npar Sum Sq Mean Sq F value
total_species               1 0.0000  0.0000  0.0001
percentile                  1 1.3274  1.3274 59.9531
total_species:percentile    1 0.0000  0.0000  0.0001
summary(birdlife_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
   Data: birdlife_species_data

REML criterion at convergence: -26.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.5919 -0.5189  0.0633  0.1497  3.4600 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04906  0.2215  
 Residual               0.02214  0.1488  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                                 Estimate Std. Error t value
(Intercept)                     9.946e-02  1.675e-02   5.939
total_species                   9.392e-07  1.597e-04   0.006
percentileTop 20%               9.949e-02  1.321e-02   7.534
total_species:percentileTop 20% 1.518e-06  1.260e-04   0.012

Correlation of Fixed Effects:
            (Intr) ttl_sp prT20%
total_specs -0.229              
prcntlTp20% -0.394  0.090       
ttl_sp:T20%  0.090 -0.394 -0.229
merlin_species_data$present <- merlin_species_data$response > 0

merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
Analysis of Variance Table
                              npar Sum Sq Mean Sq  F value
log(total_species)               1 7.6804  7.6804 145.5124
percentile                       1 1.9104  1.9104  36.1952
log(total_species):percentile    1 0.0496  0.0496   0.9393
summary(merlin_present_model)
Linear mixed model fit by REML ['lmerMod']
Formula: present ~ log(total_species) * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: 410.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.25242 -0.44048  0.02734  0.24296  2.14911 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.11259  0.3356  
 Residual               0.05278  0.2297  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                                     Estimate Std. Error t value
(Intercept)                           0.10521    0.03325   3.164
log(total_species)                    0.19281    0.01685  11.446
percentileTop 20%                     0.13652    0.02657   5.139
log(total_species):percentileTop 20% -0.01304    0.01346  -0.969

Correlation of Fixed Effects:
            (Intr) lg(t_) prT20%
lg(ttl_spc) -0.665              
prcntlTp20% -0.399  0.266       
lg(t_):T20%  0.266 -0.399 -0.665
assign_niche_appearance_factor <- function(dataset) {
  dataset$niche_appearance = '> 20%'
  dataset$niche_appearance[dataset$response_20 > 0] = '10% - 20%'
  dataset$niche_appearance[dataset$response_10 > 0] = '< 10%'
  
  dataset$niche_appearance <- factor(dataset$niche_appearance, levels = c("< 10%", "10% - 20%", "> 20%"))
  dataset
}
merlin_diff_data <- assign_niche_appearance_factor(merlin_diff_data)
ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()

load_ever_data <- function(pool) {
  response_name <- paste(pool, 'ever', 'perc', 'ratio', sep = '_')
  dataset <- data_for_response(response_name)
  dataset$present <- 'Yes'
  dataset$present[dataset$response == 0] <- 'No'
  dataset$present <- factor(dataset$present, levels = c("Yes", "No"))
  dataset
}
merlin_ever_data <- load_ever_data('merlin')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_ever_data
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()

birdlife_diff_data <- assign_niche_appearance_factor(birdlife_diff_data)
ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()

birdlife_ever_data <- load_ever_data('birdlife')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()

PCA Variation with presence
ggplot(merlin_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Merlin Regional Pool")
ggsave("species_analysis_pc1_merlin.jpg")
Saving 7.29 x 4.51 in image

ggplot(merlin_diff_data, aes(color = niche_appearance, x = pc1, y = trophic_niche)) + 
    geom_violin()
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.

ggplot(birdlife_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Birdlife Regional Pool")
ggsave("species_analysis_pc1_birdlife.jpg")
First attempt figures

  1. Niches recorded present in at least one city.
  2. Given species ranked by percentage of cities populated given they are present in regional pool; This shows niches present with the top 10% of the most urbanised species (red), the top 20% of the most urbanised species (red + green), and then the remaining niches (blue). This approximates to the niches that would be present in the bottom 25% performing hotspots (red), the bottom 75% performing hotspots (red + green), and then the niches at the highest performing hotspots or niches that are never present (blue).

  1. The redundancy/fullness of the niches that are present in cities with 10% of the most urbanised species, and the redundancy/fullness of the niches that are present in cities with 20% of the most urbanised species.
  2. The percentage increase in redundancy/fullness of niches between the top 10% of most urbanised species and the top 20% of most urbanised species, grouped using Tukey based on an anova of the percentage increase given the tropic niche. This shows the amount of redundancy that is increased from the bottom 25% performing hotspots (hotspots with 10% of regional pool) and the bottom 75% of performing hotspots (hotspots with 20% of regional pool).
jpeg("species_plot1.jpg", width = 800, height = 600)
species_plot1
dev.off()

jpeg("species_plot2.jpg", width = 800, height = 600)
species_plot2
dev.off()
Trophic Niche Accumulation
merlin_accumulation = ggplot(accumulation, aes(x = percent, y = merlin_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = merlin_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(merlin_niche_count == max(merlin_niche_count)), aes(yintercept = merlin_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Merlin Niche Accumulation")
Warning: Ignoring unknown parameters: na.value
merlin_accumulation

birdlife_accumulation = ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Birdlife Niche Accumulation")
Warning: Ignoring unknown parameters: na.value
birdlife_accumulation

annotate_figure(
  ggarrange(merlin_accumulation + rremove("xlab") + rremove("ylab"), 
            birdlife_accumulation + rremove("xlab") + rremove("ylab"),
            ncol = 2, label.x = 0),
left = text_grob("Number of Niches Present", rot = 90),
bottom = text_grob("Percentage of all Species - ranked by urbanisation"))


ggsave("trophic_niche_accumlation.jpg")
Saving 7.29 x 4.51 in image

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(stringr)
library(tidyverse)
library(bigrquery)
library(MuMIn)
library(ggplot2)

library(ggpubr)
```

```{r}
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
```

```{r}
options(na.action = "na.fail") 
```

```{r}
source("./dredge_functions.R")
```

```{r}
dredge_and_subset <- function(data) {
  model <- lm(
    response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
    data=data
  )
  dredge_result <- dredge(model)
  summary(model.avg(dredge_result, subset = delta < 10))
}
```

```{r}
load_niche_data <- function() {
  filename <- 'species_analysis_cache.csv'
  
  if (!file.exists(filename)) {
    
    sql <- "
        SELECT 
          niche_name,
          foraging_niche, 
          trophic_niche, 
          is_noctural, 
          total_species,
          merlin_10_perc_ratio, 
          merlin_20_perc_ratio,
          merlin_ever_perc_ratio,
          birdlife_10_perc_ratio, 
          birdlife_20_perc_ratio,
          birdlife_ever_perc_ratio,
          either_10_perc_ratio, 
          either_20_perc_ratio, 
          either_ever_perc_ratio,
          both_10_perc_ratio, 
          both_20_perc_ratio,
          both_ever_perc_ratio,
          body_morphspace.pc1.mean AS pc1, 
          body_morphspace.pc2.mean AS pc2, 
          body_morphspace.pc3.mean AS pc3, 
          body_morphspace.pc4.mean AS pc4
        FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000 
    "
  
    tb <- bq_project_query(projectid, sql)

    data <- bq_table_download(tb)
    write_csv(data, filename)
  }
  
  data <- read_csv(filename)
  
  data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
  
  data$foraging_niche = as.factor(data$foraging_niche)
  data$trophic_niche = as.factor(data$trophic_niche)
  data$is_nocturnal = as.factor(data$is_noctural)
  
  data
}

data_for_response <- function(column_name_for_response) {
  data <- load_niche_data()
  names(data)[names(data) == column_name_for_response] <- "response"
  
  data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
```

---------------------------------
0.25 Percentile - 10% of species
---------------------------------

```{r}
merlin_10_data <- data_for_response('merlin_10_perc_ratio')
merlin_10_data
```


```{r}
merlin_10_result <- dredge_and_subset(merlin_10_data)
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
```

```{r}
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
```


```{r}
both_10_data <- data_for_response('both_10_perc_ratio')
both_10_result <- dredge_and_subset(both_10_data)
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
```


```{r}
either_10_data <- data_for_response('either_10_perc_ratio')
either_10_result <- dredge_and_subset(either_10_data)
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
```

---------------------------------
0.75 Percentile - 20% of species
---------------------------------

```{r}
merlin_20_data <- data_for_response('merlin_20_perc_ratio')
merlin_20_result <- dredge_and_subset(merlin_20_data)
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
```

```{r}
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
```

```{r}
both_20_data <- data_for_response('both_20_perc_ratio')
both_20_result <- dredge_and_subset(both_20_data)
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
```

```{r}
either_20_data <- data_for_response('either_20_perc_ratio')
either_20_result <- dredge_and_subset(either_20_data)
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
```

------------------------------
Comparing the 2 percentiles
------------------------------

```{r}
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
```

```{r}
bind_sets <- function(first_percentile, second_percentile) {
  first_percentile$percentile <- "Top 10%"
  second_percentile$percentile <- "Top 20%"
  rbind(first_percentile, second_percentile)
}

merlin_species_data <- bind_sets(merlin_10_data,  merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data,  birdlife_20_data)
either_species_data <- bind_sets(either_10_data,  either_20_data)
both_species_data <- bind_sets(both_10_data,  both_20_data)
```

```{r}
diff_set <- function(first_percentile, second_percentile) {
  second_percentile$response_20 <- second_percentile$response
  
  result <- right_join(first_percentile, second_percentile[,c("niche_name", "response_20")], by = c("niche_name"))
  result$diff <- result$response_20 - result$response
  result$increase <- result$diff / result$response_20
  result$increase[is.na(result$increase)] = 0
  result$response_10 <- result$response
  result[,c("response_10", "response_20", "diff", "increase", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}

merlin_diff_data <- diff_set(merlin_10_data,  merlin_20_data)
birdlife_diff_data <- diff_set(birdlife_10_data,  birdlife_20_data)
either_diff_data <- diff_set(either_10_data,  either_20_data)
both_diff_data <- diff_set(both_10_data,  both_20_data)

merlin_diff_data
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)

merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)
```

```{r}
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
```
```{r}
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)

merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

merlin_increase_tropic_niche_tukey
```

```{r}
plot(merlin_increase_tropic_niche_tukey)
```
```{r}
with.tukey.label.as.group <- function(tukey, dataset, join_by) {
  labels <- data.frame(tukey$mcletters$Letters)
  labels$category <- rownames(labels)
  colnames(labels) <- c("group", "category")
  
  
  left_join(dataset, labels, by = join_by)
}
```

```{r}
merlin_diff_data1 <- with.tukey.label.as.group(merlin_increase_tropic_niche_tukey, merlin_diff_data, c("trophic_niche" = "category"))
merlin_diff_data1
```
```{r}
table(merlin_diff_data1$trophic_niche)
```

```{r}
ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)

birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)
```

```{r}
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)

birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

birdlife_increase_tropic_niche_tukey
```

```{r}
birdlife_diff_data1 <- with.tukey.label.as.group(birdlife_increase_tropic_niche_tukey, birdlife_diff_data, c("trophic_niche" = "category"))
ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
nrow(merlin_10_data[merlin_10_data$response > 0,])
nrow(merlin_20_data[merlin_20_data$response > 0,])
```

```{r}
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
```

```{r}
nrow(either_10_data[either_10_data$response > 0,])
nrow(either_20_data[either_20_data$response > 0,])
```

```{r}
nrow(both_10_data[both_10_data$response > 0,])
nrow(both_20_data[both_20_data$response > 0,])
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)
```

```{r}
merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)

merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)
```



```{r}
ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)

birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)
```


```{r}
pc_axis_figure <- function(dataset) {
  annotate_figure(
ggarrange(
  ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
```


```{r}
pc_axis_figure(merlin_species_data)
```

```{r}
library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
summary(merlin_pc1_model)
```

```{r}
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
summary(merlin_pc2_model)
```

```{r}
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
summary(merlin_pc3_model)
```

```{r}
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
summary(merlin_pc4_model)
```

```{r}
pc_axis_figure(birdlife_species_data)
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)

merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)

birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)
```

-----------------------------------------
Investigation of percentiles and presence
-----------------------------------------

```{r}
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
```

```{r}
merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
summary(merlin_species_count_model)
```

```{r}
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
summary(birdlife_species_count_model)
```

```{r}
merlin_species_data$present <- merlin_species_data$response > 0
```

```{r}
ggplot(merlin_species_data, aes(x = log(total_species), y = present, color = percentile)) + geom_boxplot() + theme_bw()
```


```{r}
merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
summary(merlin_present_model)
```

```{r}
assign_niche_appearance_factor <- function(dataset) {
  dataset$niche_appearance = '> 20%'
  dataset$niche_appearance[dataset$response_20 > 0] = '10% - 20%'
  dataset$niche_appearance[dataset$response_10 > 0] = '< 10%'
  
  dataset$niche_appearance <- factor(dataset$niche_appearance, levels = c("< 10%", "10% - 20%", "> 20%"))
  dataset
}
```

```{r}
merlin_diff_data <- assign_niche_appearance_factor(merlin_diff_data)
```


```{r}
ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()
```

```{r}
load_ever_data <- function(pool) {
  response_name <- paste(pool, 'ever', 'perc', 'ratio', sep = '_')
  dataset <- data_for_response(response_name)
  dataset$present <- 'Yes'
  dataset$present[dataset$response == 0] <- 'No'
  dataset$present <- factor(dataset$present, levels = c("Yes", "No"))
  dataset
}
```

```{r}
merlin_ever_data <- load_ever_data('merlin')
merlin_ever_data
```

```{r}
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()
```
```{r}
birdlife_diff_data <- assign_niche_appearance_factor(birdlife_diff_data)
```


```{r}
ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()
```


```{r}
birdlife_ever_data <- load_ever_data('birdlife')
```

```{r}
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()
```

----------------------------------
PCA Variation with presence
----------------------------------


```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Merlin Regional Pool")
ggsave("species_analysis_pc1_merlin.jpg")
```
```{r}
ggplot(merlin_diff_data, aes(color = niche_appearance, x = pc1, y = trophic_niche)) + 
    geom_violin()
```


```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc2, y = trophic_niche)) + 
    geom_violin()
```
```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc3, y = trophic_niche)) + 
    geom_violin()
```
```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc4, y = trophic_niche)) + 
    geom_violin()
```


```{r}
ggplot(birdlife_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Birdlife Regional Pool")
ggsave("species_analysis_pc1_birdlife.jpg")
```
----------------------------------
First attempt figures
----------------------------------

```{r}
theme <-  theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(0.5,"line"))
guide_ever <- guides(fill=guide_legend(title="Present in any city?"))
guide_percentile <- guides(fill=guide_legend(title="Top X% of ranked species", nrow = 3))

species_plot1 <- annotate_figure(
  ggarrange(
    ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + geom_bar() + theme + guide_ever + rremove("xlab") + rremove("ylab") + labs(title = "Merlin"),
    ggplot(birdlife_ever_data, aes(fill=present, y=trophic_niche)) + geom_bar() + theme + guide_ever + rremove("xlab") + rremove("ylab") + labs(title = "Birdlife"),
    ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) +  geom_bar()+ theme + guide_percentile + rremove("xlab") + rremove("ylab"),
    ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + geom_bar() + theme + guide_percentile + rremove("xlab") + rremove("ylab"),
    nrow = 2, ncol = 2, common.legend = F, label.x = 0, labels = c("a)", "", "b)", "")),
left = text_grob("Trophic Niche", rot = 90),
bottom = text_grob("Niche Count"))

species_plot1
```
a) Niches recorded present in at least one city.
b) Given species ranked by percentage of cities populated given they are present in regional pool; This shows niches present with the top 10% of the most urbanised species (red), the top 20% of the most urbanised species (red + green), and then the remaining niches (blue). This approximates to the niches that would be present in the bottom 25% performing hotspots (red), the bottom 75% performing hotspots (red + green), and then the niches at the highest performing hotspots or niches that are never present (blue).

```{r}
guide_percentile_2 <- guides(color=guide_legend(title="Percentile"))
guide_increase <- guides(color=guide_legend(title="Tukey response group", nrow = 2))

species_plot2 <- annotate_figure(
  ggarrange(
    ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme  + guide_percentile_2 + xlab("Fullness") + rremove("ylab") + labs(title = "Merlin"),
    ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme + guide_percentile_2 + xlab("Fullness") + rremove("ylab") + labs(title = "Birdlife"),
    ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme + guide_increase + xlab("Increase in fullness") + rremove("ylab"),
    ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme + guide_increase + xlab("Increase in fullness") + rremove("ylab"), 
    nrow = 2, ncol = 2, common.legend = F, label.x = 0, labels = c("c)", "", "d)", "")),
left = text_grob("Trophic Niche", rot = 90))

species_plot2
```
c) The redundancy/fullness of the niches that are present in cities with 10% of the most urbanised species, and the redundancy/fullness of the niches that are present in cities with 20% of the most urbanised species.
d) The percentage increase in redundancy/fullness of niches between the top 10% of most urbanised species and the top 20% of most urbanised species, grouped using Tukey based on an anova of the percentage increase given the tropic niche. This shows the amount of redundancy that is increased from the bottom 25% performing hotspots (hotspots with 10% of regional pool) and the bottom 75% of performing hotspots (hotspots with 20% of regional pool).


```{r}
jpeg("species_plot1.jpg", width = 800, height = 600)
species_plot1
dev.off()

jpeg("species_plot2.jpg", width = 800, height = 600)
species_plot2
dev.off()
```

----------------------------------
Trophic Niche Accumulation
----------------------------------

```{r}
accumulation <- read_csv('trophic_niche_accumulation.csv')
accumulation
```
```{r}
merlin_accumulation = ggplot(accumulation, aes(x = percent, y = merlin_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = merlin_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(merlin_niche_count == max(merlin_niche_count)), aes(yintercept = merlin_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Merlin Niche Accumulation")

merlin_accumulation
```


```{r}
birdlife_accumulation = ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Birdlife Niche Accumulation")

birdlife_accumulation
```
```{r}
annotate_figure(
  ggarrange(merlin_accumulation + rremove("xlab") + rremove("ylab"), 
            birdlife_accumulation + rremove("xlab") + rremove("ylab"),
            ncol = 2, label.x = 0),
left = text_grob("Number of Niches Present", rot = 90),
bottom = text_grob("Percentage of all Species - ranked by urbanisation"))


ggsave("trophic_niche_accumlation.jpg")
```



